################################################################################ 
#
# The distribution of hate speech and its implications for content moderation
# PSRM - Replication package
# Table D4
#
################################################################################ 

library(dplyr)
library(readr)
library(kableExtra)
library(tidyr)

rm(list = setdiff(ls(), ls(pattern = "^wd|^setsave$")))

# CH
## this code estimates metrics with a stratified estimator after having annotated the sample produced above
sample_annotated_ch <- readRDS(paste0(wd_data,"/twitter_classifier_validation/ch_sample_to_annotate_final_anon.rds"))
sample_annotations_pi <- readRDS(paste0(wd_data,"/twitter_classifier_validation/ch_sample_to_annotate_pi_anon.rds"))
sample_annotated_ch_persp <- readRDS(file = paste0(wd_data,"/twitter_classifier_validation/ch_sample_coded_perspective_anon.rds"))
sample_annotated_ch_llama <- readRDS(paste0(wd_data,"/twitter_classifier_validation/ch_sample_annotated_llama3_v1_anon.rds"))
sample_annotated_ch_deepseek_14b <- readRDS(paste0(wd_data,"/twitter_classifier_validation/ch_sample_annotated_deepseek_r1_qwen_14b_v1_anon.rds"))
sample_annotated_ch_deepseek_32b <- readRDS(paste0(wd_data,"/twitter_classifier_validation/ch_sample_annotated_deepseek_r1_qwen_32b_v1_anon.rds"))


#Get Max from Tox & S-Tox & Identity Attack
sample_annotated_ch_persp$MAX_IDENTITY_BOTH_TOX <- apply(sample_annotated_ch_persp[, c("IDENTITY_ATTACK", "SEVERE_TOXICITY","TOXICITY")], 1, max)

names(sample_annotations_pi)

sample_annotated_ch$hate_speech_human <- ifelse(sample_annotated_ch$is_hate_speech_s == 1, 1, 0) # & is.na(sample_annotated_ch$target_group_s) == F

insample <- sample_annotations_pi
insample$hate_speech_human <- sample_annotated_ch$hate_speech_human
insample$MAX_IDENTITY_BOTH_TOX <- sample_annotated_ch_persp$MAX_IDENTITY_BOTH_TOX
insample$is_hate_speech_llama <- sample_annotated_ch_llama$is_hate_speech_llama
insample$is_hate_speech_deepseek_r1_qwen_14b <- sample_annotated_ch_deepseek_14b$is_hate_speech_deepseek_r1_qwen_14b
insample$is_hate_speech_deepseek_r1_qwen_32b <- sample_annotated_ch_deepseek_32b$is_hate_speech_deepseek_r1_qwen_32b


insample <- insample %>% filter(tweet_text_na == F)

hate_speech_truth <- "hate_speech_human" #this variable should be 0 for non hate and 1 for hate as per the gold standard "truth"
classifying_var <- "prob_hatespeech" #this should be the variable on the basis of which classification and stratification happened
classifying_var_ii <- "MAX_IDENTITY_BOTH_TOX"
classifying_var_iii <- "is_hate_speech_llama"
classifying_var_iv <- "is_hate_speech_deepseek_r1_qwen_14b"
classifying_var_v <- "is_hate_speech_deepseek_r1_qwen_32b"

threshold <- 0.85 # threshold used for classification
threshold_ii <- 0.8 # since this is also a perspective classification we also use 0.8 like in the US
threshold_iii <- 0.8 # It is anyway binary so all greater than 0 is a one! 
threshold_iv <- 0.8 # It is anyway binary so all greater than 0 is a one! 
threshold_v <- 0.8 # It is anyway binary so all greater than 0 is a one! 


## define functions for stratified sampling estimators 
do_metrics <- function(tr0_pred0, tr1_pred0, tr0_pred1, tr1_pred1){
  precision <- tr1_pred1/(tr1_pred1+tr0_pred1) 
  recall <- tr1_pred1/(tr1_pred1+tr1_pred0) 
  f1 <- ifelse(precision==0 & recall==0, 0, 2/((1/precision)+(1/recall)) )
  accuracy <- tr0_pred0+tr1_pred1
  MCC <- (tr1_pred1*tr0_pred0 - tr1_pred0*tr0_pred1)/sqrt((tr1_pred1+tr0_pred1)*(tr1_pred1+tr1_pred0)*(tr0_pred0+tr0_pred1)*(tr0_pred0+tr1_pred0))
  kappa <- 2*(tr1_pred1*tr0_pred0 - tr0_pred1*tr1_pred0)/((tr1_pred1+tr0_pred1)*(tr0_pred1+tr0_pred0) + (tr1_pred1+tr1_pred0)*(tr0_pred0+tr1_pred0))
  precision_rev <- tr0_pred0/(tr0_pred0+tr1_pred0) 
  recall_rev <- tr0_pred0/(tr0_pred0+tr0_pred1) 
  f1_neg <- ifelse(precision_rev==0 & recall_rev==0, 0, (2/((1/precision_rev)+(1/recall_rev))))
  f1_unweighted <- (f1+ f1_neg)/2
  f1_weighted <- ((tr1_pred1+tr1_pred0)*f1)+((tr0_pred0+tr0_pred1)*f1_neg)
  bm <- recall + (tr0_pred0/(tr0_pred0+tr0_pred1)) - 1
  
  output <- data.frame(precision=precision, recall=recall, f1=f1, accuracy=accuracy, 
                       MCC=MCC, Kappa=kappa, F1_unweighted=f1_unweighted, F1_weighted=f1_weighted, BM=bm)
  return(output)
}

our_metrics <- function(data, truth, pred, probs="Prob", threshold){
  if(is.null(probs)){
    warning("No probabilities in data")
    data$Prob <- 1
  } else {
    data$Prob <- unlist(data[,probs])
  }
  data$truth <- unlist(data[,truth])
  data$var1 <- unlist(data[,pred])
  data$subgroup_hate <- factor(ifelse(data$truth==0, 
                                      ifelse(data$var1<threshold, "tr0_pred0", "tr0_pred1"), 
                                      ifelse(data$var1<threshold, "tr1_pred0", "tr1_pred1")),
                               levels=c("tr0_pred0", "tr0_pred1", "tr1_pred0", "tr1_pred1"))
  data <- data[!is.na(data$subgroup_hate),]
  tr0_pred0 <- weighted.mean((data$subgroup_hate=="tr0_pred0"), 1/data$Prob)
  tr1_pred0 <- weighted.mean((data$subgroup_hate=="tr1_pred0"), 1/data$Prob)
  tr0_pred1 <- weighted.mean((data$subgroup_hate=="tr0_pred1"), 1/data$Prob)
  tr1_pred1 <- weighted.mean((data$subgroup_hate=="tr1_pred1"), 1/data$Prob)
  
  output <- do_metrics(tr0_pred0, tr1_pred0, tr0_pred1, tr1_pred1)
  return(output)
}

# EMLNP Classifier
our_metrics(insample, truth=hate_speech_truth, pred=classifying_var, threshold=threshold)
# Perspective 
our_metrics(insample, truth=hate_speech_truth, pred=classifying_var_ii, threshold=threshold_ii)
# LLaMA 3 8b (Fine Tuned on 500 Expert Annotated Comments)
our_metrics(insample, truth=hate_speech_truth, pred=classifying_var_iii, threshold=threshold_iii)
#DeepSeek R1 Qwen 14b (Fine Tuned on 500 Expert Annotated Comments)
our_metrics(insample, truth=hate_speech_truth, pred=classifying_var_iv, threshold=threshold_iv)
#DeepSeek R1 Qwen 32b (8bit) (Fine Tuned on 500 Expert Annotated Comments)
our_metrics(insample, truth=hate_speech_truth, pred=classifying_var_v, threshold=threshold_v)

# ── 1. Compute metrics for each model ──────────────────────────────
# (you can wrap this in a function or loop if you prefer)
m1 <- our_metrics(insample,
                  truth     = hate_speech_truth,
                  pred      = classifying_var,
                  threshold = threshold) %>%
  mutate(Model = "Kotarcic et al. (2022)")

m2 <- our_metrics(insample,
                  truth     = hate_speech_truth,
                  pred      = classifying_var_ii,
                  threshold = threshold_ii) %>%
  mutate(Model = "Perspective API")

m3 <- our_metrics(insample,
                  truth     = hate_speech_truth,
                  pred      = classifying_var_iii,
                  threshold = threshold_iii) %>%
  mutate(Model = "LLaMA 3 8 b")

m4 <- our_metrics(insample,
                  truth     = hate_speech_truth,
                  pred      = classifying_var_iv,
                  threshold = threshold_iv) %>%
  mutate(Model = "DeepSeek R1 Qwen 14 b")

m5 <- our_metrics(insample,
                  truth     = hate_speech_truth,
                  pred      = classifying_var_v,
                  threshold = threshold_v) %>%
  mutate(Model = "DeepSeek R1 Qwen 32 b (8-bit)")

# ── 2. Bind into one data‐frame ────────────────────────────────────
df_all <- bind_rows(m1, m2, m3, m4, m5)

# ── 3. Select only the 6 columns in your PNG ───────────────────────
df_clean <- df_all %>%
  select(
    Model,
    Precision   = precision,
    Recall      = recall,
    `F1 score`  = f1,
    Accuracy    = accuracy,
    `F1 (Weighted)` = F1_weighted
  )

# ── 4. Save as CSV ─────────────────────────────────────────────────
write_csv(df_clean, paste0(wd_res,"/tables/table_d4.csv"))
cat("\n====================\n")
cat("Saved Table D4")
cat("\n====================\n")

# ── 5. Print a LaTeX table via kableExtra ─────────────────────────
latex_tbl <- df_clean %>%
  kable(
    format    = "latex",
    booktabs  = TRUE,
    align     = c("l", rep("c", 5)),
    caption   = "Comparison of baseline classifiers with fine‐tuned LLMs (Swiss Tweets validation set)",
    col.names = names(df_clean)
  ) %>%
  kable_styling(
    latex_options = c("hold_position", "striped"),
    font_size     = 10
  )

# or just cat() to console:
cat(latex_tbl)
save_kable(latex_tbl, paste0(wd_res,"/tables/table_d4.tex"))
